{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "Automated ML Pipeline Generator using TPOT in Python.ipynb",
      "provenance": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "widgets": {
      "application/vnd.jupyter.widget-state+json": {
        "5d8c9a299f554750aacc6ea6be35b831": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "state": {
            "_view_name": "HBoxView",
            "_dom_classes": [],
            "_model_name": "HBoxModel",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "box_style": "",
            "layout": "IPY_MODEL_c927bd71ab94408a94de40b9620b3db1",
            "_model_module": "@jupyter-widgets/controls",
            "children": [
              "IPY_MODEL_5f41c1d011a84f80833ff96b557e43da",
              "IPY_MODEL_a937739ccc00435285e57e5a972b0123"
            ]
          }
        },
        "c927bd71ab94408a94de40b9620b3db1": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "5f41c1d011a84f80833ff96b557e43da": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "state": {
            "_view_name": "ProgressView",
            "style": "IPY_MODEL_0ec7bf7cb61c44d39847460437dcf86b",
            "_dom_classes": [],
            "description": "Optimization Progress: 100%",
            "_model_name": "FloatProgressModel",
            "bar_style": "",
            "max": 600,
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": 600,
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "orientation": "horizontal",
            "min": 0,
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_85d606d6f120478cafefbc6e36c6e47e"
          }
        },
        "a937739ccc00435285e57e5a972b0123": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "state": {
            "_view_name": "HTMLView",
            "style": "IPY_MODEL_302ec6f52a2041f293e6f316fdc14b65",
            "_dom_classes": [],
            "description": "",
            "_model_name": "HTMLModel",
            "placeholder": "​",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": " 600/600 [05:02&lt;00:00,  2.32pipeline/s]",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_d802eb0044b843ee9faaaa33877a2b19"
          }
        },
        "0ec7bf7cb61c44d39847460437dcf86b": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "ProgressStyleModel",
            "description_width": "initial",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "bar_color": null,
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "85d606d6f120478cafefbc6e36c6e47e": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "302ec6f52a2041f293e6f316fdc14b65": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "DescriptionStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "d802eb0044b843ee9faaaa33877a2b19": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        }
      }
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "rcIyjXA3QWdZ",
        "colab_type": "text"
      },
      "source": [
        "## Automated ML Pipeline Generator using TPOT in Python\n",
        "+ Genetic Algorithms: based on natural selection/survival of the fittest\n",
        "\n",
        "#### Steps of Genetic Algorithms\n",
        "+ Selection: find the best and fittest\n",
        "+ Crossover: breed the best and the fittest to get a new generation\n",
        "+ Mutation: mutate the offspring of the new generation till you get the best and fittest\n",
        "\n",
        "### Pkgs\n",
        "+ pip install tpot\n",
        "\n",
        "#### Dependencies\n",
        "+ scikit learn and numpy\n",
        "\n",
        "#### NB\n",
        "+ Remove Missing Values\n",
        "+ Must be categorical and numbers\n",
        "\n",
        "\n",
        "\n"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "Jelt8N-KOFbW",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "data_url = \"https://raw.githubusercontent.com/Jcharis/Machine-Learning-Web-Apps/master/Iris-Species-Predictor-ML-Flask-App-With-Materialize.css/data/iris.csv\"\n",
        "data_url2 = \"https://raw.githubusercontent.com/Jcharis/Python-Machine-Learning/master/Predicting_Contraceptives_MethodChoice_n_Usage_with_ML_and_TPOT/data_cmc/cmc.data\""
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "6q7PZTQBP96y",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "# Load ML Pkgs\n",
        "from sklearn.model_selection import train_test_split\n",
        "from sklearn.linear_model import LogisticRegression\n",
        "from sklearn.ensemble import RandomForestClassifier\n",
        "from sklearn.naive_bayes import MultinomialNB"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "0G-qFr6qQ-zN",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "# Load EDA pkg\n",
        "import pandas as pd \n",
        "import numpy as np"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "xpm7HzZIdDXt",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "# Load Our dataset\n",
        "df = pd.read_csv(data_url)"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "u51mvRabdDDU",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 206
        },
        "outputId": "65f40eeb-2a9e-4510-fc43-4ec209730649"
      },
      "source": [
        "df.head()"
      ],
      "execution_count": 23,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>sepal_length</th>\n",
              "      <th>sepal_width</th>\n",
              "      <th>petal_length</th>\n",
              "      <th>petal_width</th>\n",
              "      <th>species</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>5.1</td>\n",
              "      <td>3.5</td>\n",
              "      <td>1.4</td>\n",
              "      <td>0.2</td>\n",
              "      <td>setosa</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>4.9</td>\n",
              "      <td>3.0</td>\n",
              "      <td>1.4</td>\n",
              "      <td>0.2</td>\n",
              "      <td>setosa</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>4.7</td>\n",
              "      <td>3.2</td>\n",
              "      <td>1.3</td>\n",
              "      <td>0.2</td>\n",
              "      <td>setosa</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>4.6</td>\n",
              "      <td>3.1</td>\n",
              "      <td>1.5</td>\n",
              "      <td>0.2</td>\n",
              "      <td>setosa</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>5.0</td>\n",
              "      <td>3.6</td>\n",
              "      <td>1.4</td>\n",
              "      <td>0.2</td>\n",
              "      <td>setosa</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "   sepal_length  sepal_width  petal_length  petal_width species\n",
              "0           5.1          3.5           1.4          0.2  setosa\n",
              "1           4.9          3.0           1.4          0.2  setosa\n",
              "2           4.7          3.2           1.3          0.2  setosa\n",
              "3           4.6          3.1           1.5          0.2  setosa\n",
              "4           5.0          3.6           1.4          0.2  setosa"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 23
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "l0xdZsZFdCtE",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 128
        },
        "outputId": "be12c230-7953-474f-9b1f-775dad8132f4"
      },
      "source": [
        "# Checking for missing\n",
        "df.isnull().sum()"
      ],
      "execution_count": 24,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "sepal_length    0\n",
              "sepal_width     0\n",
              "petal_length    0\n",
              "petal_width     0\n",
              "species         0\n",
              "dtype: int64"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 24
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "apW0ltnedSfU",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 72
        },
        "outputId": "775b3358-4a2e-4aa4-ea0d-9f04ccb11f58"
      },
      "source": [
        "df.columns"
      ],
      "execution_count": 25,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "Index(['sepal_length', 'sepal_width', 'petal_length', 'petal_width',\n",
              "       'species'],\n",
              "      dtype='object')"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 25
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "Tql8ZnxkdWb4",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "# Species to Numerical\n",
        "d = {value:index for index,value in enumerate(df['species'].unique())}"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "atCQeGWgdkGi",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 35
        },
        "outputId": "e592e385-e0f8-4700-8fb5-8ac59a0d7025"
      },
      "source": [
        "d"
      ],
      "execution_count": 28,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "{'setosa': 0, 'versicolor': 1, 'virginica': 2}"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 28
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "c-52OrzOdrxH",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "df['new_label'] = df['species'].map(d)"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "XpKXbfmydylL",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 206
        },
        "outputId": "933e9048-1200-4435-a52f-cd4aafdcfd84"
      },
      "source": [
        "df.head()"
      ],
      "execution_count": 30,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>sepal_length</th>\n",
              "      <th>sepal_width</th>\n",
              "      <th>petal_length</th>\n",
              "      <th>petal_width</th>\n",
              "      <th>species</th>\n",
              "      <th>new_label</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>5.1</td>\n",
              "      <td>3.5</td>\n",
              "      <td>1.4</td>\n",
              "      <td>0.2</td>\n",
              "      <td>setosa</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>4.9</td>\n",
              "      <td>3.0</td>\n",
              "      <td>1.4</td>\n",
              "      <td>0.2</td>\n",
              "      <td>setosa</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>4.7</td>\n",
              "      <td>3.2</td>\n",
              "      <td>1.3</td>\n",
              "      <td>0.2</td>\n",
              "      <td>setosa</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>4.6</td>\n",
              "      <td>3.1</td>\n",
              "      <td>1.5</td>\n",
              "      <td>0.2</td>\n",
              "      <td>setosa</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>5.0</td>\n",
              "      <td>3.6</td>\n",
              "      <td>1.4</td>\n",
              "      <td>0.2</td>\n",
              "      <td>setosa</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "   sepal_length  sepal_width  petal_length  petal_width species  new_label\n",
              "0           5.1          3.5           1.4          0.2  setosa          0\n",
              "1           4.9          3.0           1.4          0.2  setosa          0\n",
              "2           4.7          3.2           1.3          0.2  setosa          0\n",
              "3           4.6          3.1           1.5          0.2  setosa          0\n",
              "4           5.0          3.6           1.4          0.2  setosa          0"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 30
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "FcC2g52seSQ9",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "xfeatures = df[['sepal_length', 'sepal_width', 'petal_length', 'petal_width']]\n",
        "ylabels = df['new_label']"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "HwZvJBtveCUo",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "\n",
        "from sklearn.model_selection import cross_val_score"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "8obJxaD5d1PI",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 776
        },
        "outputId": "033a6026-9388-4d04-afbc-0fe3aaf24605"
      },
      "source": [
        "# Individual Algorithm\n",
        "cv_scores = cross_val_score(LogisticRegression(),xfeatures,ylabels,cv=10)"
      ],
      "execution_count": 33,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "/usr/local/lib/python3.6/dist-packages/sklearn/linear_model/_logistic.py:940: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
            "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
            "\n",
            "Increase the number of iterations (max_iter) or scale the data as shown in:\n",
            "    https://scikit-learn.org/stable/modules/preprocessing.html\n",
            "Please also refer to the documentation for alternative solver options:\n",
            "    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
            "  extra_warning_msg=_LOGISTIC_SOLVER_CONVERGENCE_MSG)\n",
            "/usr/local/lib/python3.6/dist-packages/sklearn/linear_model/_logistic.py:940: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
            "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
            "\n",
            "Increase the number of iterations (max_iter) or scale the data as shown in:\n",
            "    https://scikit-learn.org/stable/modules/preprocessing.html\n",
            "Please also refer to the documentation for alternative solver options:\n",
            "    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
            "  extra_warning_msg=_LOGISTIC_SOLVER_CONVERGENCE_MSG)\n",
            "/usr/local/lib/python3.6/dist-packages/sklearn/linear_model/_logistic.py:940: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
            "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
            "\n",
            "Increase the number of iterations (max_iter) or scale the data as shown in:\n",
            "    https://scikit-learn.org/stable/modules/preprocessing.html\n",
            "Please also refer to the documentation for alternative solver options:\n",
            "    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
            "  extra_warning_msg=_LOGISTIC_SOLVER_CONVERGENCE_MSG)\n",
            "/usr/local/lib/python3.6/dist-packages/sklearn/linear_model/_logistic.py:940: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
            "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
            "\n",
            "Increase the number of iterations (max_iter) or scale the data as shown in:\n",
            "    https://scikit-learn.org/stable/modules/preprocessing.html\n",
            "Please also refer to the documentation for alternative solver options:\n",
            "    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
            "  extra_warning_msg=_LOGISTIC_SOLVER_CONVERGENCE_MSG)\n",
            "/usr/local/lib/python3.6/dist-packages/sklearn/linear_model/_logistic.py:940: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
            "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
            "\n",
            "Increase the number of iterations (max_iter) or scale the data as shown in:\n",
            "    https://scikit-learn.org/stable/modules/preprocessing.html\n",
            "Please also refer to the documentation for alternative solver options:\n",
            "    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
            "  extra_warning_msg=_LOGISTIC_SOLVER_CONVERGENCE_MSG)\n"
          ],
          "name": "stderr"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "EVngkYQpempi",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 54
        },
        "outputId": "17eac0da-1221-464b-840b-20e99a0379b5"
      },
      "source": [
        "cv_scores"
      ],
      "execution_count": 34,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "array([1.        , 0.93333333, 1.        , 1.        , 0.93333333,\n",
              "       0.93333333, 0.93333333, 1.        , 1.        , 1.        ])"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 34
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "OAYvZFZVeyNh",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 35
        },
        "outputId": "83d1fd1d-c683-47e3-d82b-85f72b881a65"
      },
      "source": [
        "print(np.mean(cv_scores))"
      ],
      "execution_count": 35,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "0.9733333333333334\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "3ZUWBQlheqfV",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "# Individual Algorithm\n",
        "rf_cv_scores = cross_val_score(RandomForestClassifier(),xfeatures,ylabels,cv=10)"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "bHgdLvMtfXlq",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 54
        },
        "outputId": "e7975c9e-4e9f-4237-9964-7605ac6ec9d8"
      },
      "source": [
        "rf_cv_scores"
      ],
      "execution_count": 40,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "array([1.        , 0.93333333, 1.        , 0.93333333, 0.93333333,\n",
              "       0.93333333, 0.93333333, 1.        , 1.        , 1.        ])"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 40
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "jEv4N_Gre79y",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 35
        },
        "outputId": "f7236d9a-557f-402b-c62d-1d5b6b111829"
      },
      "source": [
        "print(np.mean(rf_cv_scores))"
      ],
      "execution_count": 37,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "0.9666666666666666\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "ZklmrQL2e__a",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "# Individual Algorithm\n",
        "rf_cv_scores2 = cross_val_score(RandomForestClassifier(n_estimators=100,max_depth=2),xfeatures,ylabels,cv=10)"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "oFDLkw4ZfRtJ",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 54
        },
        "outputId": "46bf1fc2-2d37-4e8f-f9d1-6d57f8428ab4"
      },
      "source": [
        "rf_cv_scores2"
      ],
      "execution_count": 39,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "array([0.93333333, 0.93333333, 1.        , 0.93333333, 0.93333333,\n",
              "       0.93333333, 0.86666667, 1.        , 1.        , 1.        ])"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 39
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "PBey0MJQfRfZ",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 35
        },
        "outputId": "edb91cfb-fa39-41fa-d47e-512bb2bcec00"
      },
      "source": [
        "print(np.mean(rf_cv_scores2))"
      ],
      "execution_count": 41,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "0.9533333333333334\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "RkuPlKqxfRKE",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "### AutoML with TPOT"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "uaJUMBhVfp_I",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 610
        },
        "outputId": "131b7353-e118-491d-f7bf-84994718d98a"
      },
      "source": [
        "!pip install tpot"
      ],
      "execution_count": 42,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Collecting tpot\n",
            "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/37/d8/719024ea20497eb6566ed5cc070e66e8c1e125e0e5d9966837cd00a3a83d/TPOT-0.11.2-py3-none-any.whl (76kB)\n",
            "\r\u001b[K     |████▎                           | 10kB 15.3MB/s eta 0:00:01\r\u001b[K     |████████▋                       | 20kB 3.1MB/s eta 0:00:01\r\u001b[K     |████████████▉                   | 30kB 3.8MB/s eta 0:00:01\r\u001b[K     |█████████████████▏              | 40kB 4.2MB/s eta 0:00:01\r\u001b[K     |█████████████████████▌          | 51kB 3.6MB/s eta 0:00:01\r\u001b[K     |█████████████████████████▊      | 61kB 4.0MB/s eta 0:00:01\r\u001b[K     |██████████████████████████████  | 71kB 4.3MB/s eta 0:00:01\r\u001b[K     |████████████████████████████████| 81kB 3.4MB/s \n",
            "\u001b[?25hCollecting deap>=1.2\n",
            "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/0a/eb/2bd0a32e3ce757fb26264765abbaedd6d4d3640d90219a513aeabd08ee2b/deap-1.3.1-cp36-cp36m-manylinux2010_x86_64.whl (157kB)\n",
            "\r\u001b[K     |██                              | 10kB 19.3MB/s eta 0:00:01\r\u001b[K     |████▏                           | 20kB 26.6MB/s eta 0:00:01\r\u001b[K     |██████▎                         | 30kB 29.3MB/s eta 0:00:01\r\u001b[K     |████████▍                       | 40kB 31.8MB/s eta 0:00:01\r\u001b[K     |██████████▍                     | 51kB 26.1MB/s eta 0:00:01\r\u001b[K     |████████████▌                   | 61kB 28.4MB/s eta 0:00:01\r\u001b[K     |██████████████▋                 | 71kB 25.5MB/s eta 0:00:01\r\u001b[K     |████████████████▊               | 81kB 26.8MB/s eta 0:00:01\r\u001b[K     |██████████████████▊             | 92kB 14.0MB/s eta 0:00:01\r\u001b[K     |████████████████████▉           | 102kB 15.1MB/s eta 0:00:01\r\u001b[K     |███████████████████████         | 112kB 15.1MB/s eta 0:00:01\r\u001b[K     |█████████████████████████       | 122kB 15.1MB/s eta 0:00:01\r\u001b[K     |███████████████████████████▏    | 133kB 15.1MB/s eta 0:00:01\r\u001b[K     |█████████████████████████████▏  | 143kB 15.1MB/s eta 0:00:01\r\u001b[K     |███████████████████████████████▎| 153kB 15.1MB/s eta 0:00:01\r\u001b[K     |████████████████████████████████| 163kB 15.1MB/s \n",
            "\u001b[?25hCollecting stopit>=1.1.1\n",
            "  Downloading https://files.pythonhosted.org/packages/35/58/e8bb0b0fb05baf07bbac1450c447d753da65f9701f551dca79823ce15d50/stopit-1.1.2.tar.gz\n",
            "Requirement already satisfied: joblib>=0.13.2 in /usr/local/lib/python3.6/dist-packages (from tpot) (0.14.1)\n",
            "Requirement already satisfied: pandas>=0.24.2 in /usr/local/lib/python3.6/dist-packages (from tpot) (1.0.3)\n",
            "Requirement already satisfied: tqdm>=4.36.1 in /usr/local/lib/python3.6/dist-packages (from tpot) (4.41.1)\n",
            "Requirement already satisfied: scikit-learn>=0.22.0 in /usr/local/lib/python3.6/dist-packages (from tpot) (0.22.2.post1)\n",
            "Requirement already satisfied: scipy>=1.3.1 in /usr/local/lib/python3.6/dist-packages (from tpot) (1.4.1)\n",
            "Collecting update-checker>=0.16\n",
            "  Downloading https://files.pythonhosted.org/packages/d6/c3/aaf8a162df8e8f9d321237c7c0e63aff95b42d19f1758f96606e3cabb245/update_checker-0.17-py2.py3-none-any.whl\n",
            "Requirement already satisfied: numpy>=1.16.3 in /usr/local/lib/python3.6/dist-packages (from tpot) (1.18.4)\n",
            "Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.6/dist-packages (from pandas>=0.24.2->tpot) (2018.9)\n",
            "Requirement already satisfied: python-dateutil>=2.6.1 in /usr/local/lib/python3.6/dist-packages (from pandas>=0.24.2->tpot) (2.8.1)\n",
            "Requirement already satisfied: requests>=2.3.0 in /usr/local/lib/python3.6/dist-packages (from update-checker>=0.16->tpot) (2.23.0)\n",
            "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.6/dist-packages (from python-dateutil>=2.6.1->pandas>=0.24.2->tpot) (1.12.0)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests>=2.3.0->update-checker>=0.16->tpot) (2020.4.5.1)\n",
            "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests>=2.3.0->update-checker>=0.16->tpot) (1.24.3)\n",
            "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests>=2.3.0->update-checker>=0.16->tpot) (3.0.4)\n",
            "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests>=2.3.0->update-checker>=0.16->tpot) (2.9)\n",
            "Building wheels for collected packages: stopit\n",
            "  Building wheel for stopit (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "  Created wheel for stopit: filename=stopit-1.1.2-cp36-none-any.whl size=11956 sha256=0f4ef274cedf5bacc6a4962ec3557d79ab6bae70783af582a93b404e650ccb32\n",
            "  Stored in directory: /root/.cache/pip/wheels/3c/85/2b/2580190404636bfc63e8de3dff629c03bb795021e1983a6cc7\n",
            "Successfully built stopit\n",
            "Installing collected packages: deap, stopit, update-checker, tpot\n",
            "Successfully installed deap-1.3.1 stopit-1.1.2 tpot-0.11.2 update-checker-0.17\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "o5f4lMGffpeH",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "import tpot"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "T1RBBs7df1-c",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 479
        },
        "outputId": "44cfc391-4fd4-4778-fa5f-7754338e6864"
      },
      "source": [
        "# Methods and Attributes\n",
        "dir(tpot)"
      ],
      "execution_count": 44,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "['TPOTClassifier',\n",
              " 'TPOTRegressor',\n",
              " '__builtins__',\n",
              " '__cached__',\n",
              " '__doc__',\n",
              " '__file__',\n",
              " '__loader__',\n",
              " '__name__',\n",
              " '__package__',\n",
              " '__path__',\n",
              " '__spec__',\n",
              " '__version__',\n",
              " '_version',\n",
              " 'base',\n",
              " 'builtins',\n",
              " 'config',\n",
              " 'decorators',\n",
              " 'driver',\n",
              " 'export_utils',\n",
              " 'gp_deap',\n",
              " 'gp_types',\n",
              " 'main',\n",
              " 'metrics',\n",
              " 'operator_utils',\n",
              " 'tpot']"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 44
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "xJ-1Qh1mhhvD",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "# Split in train and test\n",
        "x_train,x_test,y_train,y_test = train_test_split(xfeatures,ylabels,test_size=0.3,random_state=42)"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "lZst9n7ah51u",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "# Init\n",
        "tpot = TPOTClassifier(generations=5,verbosity=2)"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "8-yvAzy3h494",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 314,
          "referenced_widgets": [
            "5d8c9a299f554750aacc6ea6be35b831",
            "c927bd71ab94408a94de40b9620b3db1",
            "5f41c1d011a84f80833ff96b557e43da",
            "a937739ccc00435285e57e5a972b0123",
            "0ec7bf7cb61c44d39847460437dcf86b",
            "85d606d6f120478cafefbc6e36c6e47e",
            "302ec6f52a2041f293e6f316fdc14b65",
            "d802eb0044b843ee9faaaa33877a2b19"
          ]
        },
        "outputId": "63ccbb83-92c8-413a-d34a-ddbf6b92e23d"
      },
      "source": [
        "# Fit data\n",
        "tpot.fit(x_train,y_train)"
      ],
      "execution_count": 51,
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "5d8c9a299f554750aacc6ea6be35b831",
              "version_minor": 0,
              "version_major": 2
            },
            "text/plain": [
              "HBox(children=(FloatProgress(value=0.0, description='Optimization Progress', max=600.0, style=ProgressStyle(de…"
            ]
          },
          "metadata": {
            "tags": []
          }
        },
        {
          "output_type": "stream",
          "text": [
            "Generation 1 - Current best internal CV score: 0.9714285714285713\n",
            "Generation 2 - Current best internal CV score: 0.9714285714285715\n",
            "Generation 3 - Current best internal CV score: 0.9714285714285715\n",
            "Generation 4 - Current best internal CV score: 0.9714285714285715\n",
            "Generation 5 - Current best internal CV score: 0.9714285714285715\n",
            "\n",
            "Best pipeline: KNeighborsClassifier(Nystroem(input_matrix, gamma=0.25, kernel=cosine, n_components=4), n_neighbors=20, p=1, weights=distance)\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "TPOTClassifier(config_dict=None, crossover_rate=0.1, cv=5,\n",
              "               disable_update_check=False, early_stop=None, generations=5,\n",
              "               log_file=<ipykernel.iostream.OutStream object at 0x7f05559644a8>,\n",
              "               max_eval_time_mins=5, max_time_mins=None, memory=None,\n",
              "               mutation_rate=0.9, n_jobs=1, offspring_size=None,\n",
              "               periodic_checkpoint_folder=None, population_size=100,\n",
              "               random_state=None, scoring=None, subsample=1.0, template=None,\n",
              "               use_dask=False, verbosity=2, warm_start=False)"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 51
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "RG3IDNdCjjiN",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 35
        },
        "outputId": "0b6388c4-128d-4338-f12d-9d968e97b209"
      },
      "source": [
        "tpot.score(x_test,y_test)"
      ],
      "execution_count": 52,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "1.0"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 52
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "jQc8GFf7jjGT",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "# Export the result\n",
        "tpot.export('tpot_ml_pipeline.py')"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "c3cesXRmh4So",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "# Predictions\n",
        "ex = np.array([6.2,3.4,5.4,2.3]).reshape(1,-1)"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "rYWXT0FWkLMz",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 35
        },
        "outputId": "b010eb1a-6229-4f07-f205-e8bfbcf0bfa5"
      },
      "source": [
        "tpot.predict(ex)"
      ],
      "execution_count": 57,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "array([2])"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 57
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "a8rLKDPukNZc",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 35
        },
        "outputId": "424b8941-4d08-48e0-db95-73831e4ce10d"
      },
      "source": [
        "d"
      ],
      "execution_count": 58,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "{'setosa': 0, 'versicolor': 1, 'virginica': 2}"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 58
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "p8ln9HSJkchs",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        ""
      ],
      "execution_count": 0,
      "outputs": []
    }
  ]
}